home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.4)
-
- import math
- from spambayes.Options import options
-
- try:
- (True, False)
- except NameError:
- (True, False) = (1, 0)
-
-
- class Hist:
- '''Simple histograms of float values.'''
-
- def __init__(self, nbuckets = options[('TestDriver', 'nbuckets')], lo = 0.0, hi = 100.0):
- self.lo = lo
- self.hi = hi
- self.nbuckets = nbuckets
- self.buckets = [
- 0] * nbuckets
- self.data = []
- self.stats_uptodate = False
-
-
- def add(self, x):
- self.data.append(x)
- self.stats_uptodate = False
-
-
- def compute_stats(self):
- if self.stats_uptodate:
- return None
-
- self.stats_uptodate = True
- data = self.data
- n = self.n = len(data)
- if n == 0:
- return None
-
- data.sort()
- self.min = data[0]
- self.max = data[-1]
- if n & 1:
- self.median = data[n // 2]
- else:
- self.median = (data[n // 2] + data[(n - 1) // 2]) / 2.0
- sum = 0.0
- for x in data:
- sum += x
-
- var = 0.0
- for x in data:
- d = x - mean
- var += d * d
-
- self.var = var / n
- self.sdev = math.sqrt(self.var)
- for p in options[('TestDriver', 'percentiles')]:
- if p <= p:
- pass
- elif not p <= 100.0:
- raise AssertionError
- self.pct = pct = []
- i = (n - 1) * p / 100.0
- pct.append((p, score))
-
-
-
- def __iadd__(self, other):
- self.data.extend(other.data)
- self.stats_uptodate = False
- return self
-
-
- def get_lo_hi(self):
- self.compute_stats()
- lo = self.lo
- hi = self.hi
- if lo is None:
- lo = self.min
-
- if hi is None:
- hi = self.max
-
- return (lo, hi)
-
-
- def get_bucketwidth(self):
- (lo, hi) = self.get_lo_hi()
- span = float(hi - lo)
- return span / self.nbuckets
-
-
- def fill_buckets(self, nbuckets = None):
- if nbuckets is None:
- nbuckets = self.nbuckets
-
- if nbuckets <= 0:
- raise ValueError('nbuckets %g > 0 required' % nbuckets)
-
- self.nbuckets = nbuckets
- self.buckets = buckets = [
- 0] * nbuckets
- (lo, hi) = self.get_lo_hi()
- bucketwidth = self.get_bucketwidth()
- for x in self.data:
- i = int((x - lo) / bucketwidth)
- if i >= nbuckets:
- i = nbuckets - 1
- elif i < 0:
- i = 0
-
- buckets[i] += 1
-
-
-
- def display(self, nbuckets = None, WIDTH = 61):
- if nbuckets is None:
- nbuckets = self.nbuckets
-
- if nbuckets <= 0:
- raise ValueError('nbuckets %g > 0 required' % nbuckets)
-
- self.compute_stats()
- n = self.n
- if n == 0:
- return None
-
- print '%d items; mean %.2f; sdev %.2f' % (n, self.mean, self.sdev)
- print '-> <stat> min %g; median %g; max %g' % (self.min, self.median, self.max)
- pcts = [ '%g%% %g' % x for x in self.pct ]
- print '-> <stat> percentiles:', '; '.join(pcts)
- (lo, hi) = self.get_lo_hi()
- if lo > hi:
- return None
-
- self.fill_buckets(nbuckets)
- biggest = max(self.buckets)
- (hunit, r) = divmod(biggest, WIDTH)
- if r:
- hunit += 1
-
- print '* =', hunit, 'items'
- ndigits = len(str(biggest))
- bucketwidth = self.get_bucketwidth()
- whole_digits = max(len(str(int(lo))), len(str(int(hi - bucketwidth))))
- frac_digits = 0
- while bucketwidth < 1.0:
- frac_digits += 1
- bucketwidth *= 10.0
- format = '%' + str(whole_digits + 1 + frac_digits) + '.' + str(frac_digits) + 'f %' + str(ndigits) + 'd'
- bucketwidth = self.get_bucketwidth()
- for i in range(nbuckets):
- n = self.buckets[i]
- print format % (lo + i * bucketwidth, n), '*' * ((n + hunit - 1) // hunit)
-
-
-
-